

******Decisions
*What do we do about ln_emp_ent or ln_ees_ent
**JT note to self, maybe move regression sample to regression file




di "************************************************************************************************************"
di "***************************************LOAD AND LABEL FIRM DATA*********************************************"
di "************************************************************************************************************"

use  "$saveddata/$firmdataname", clear		


noi di "**********************************Sample restrictions**************************************************************" 

drop if birth_ent>2008						// drop firms born during/after fiber rollout
drop if year<2006							// exclude earlier years


noi di "**********************************Variable generation **************************************************************" 

noi di "Generate measure of firms that move to another location with different expected fiber rollout enablement phase"
sort ruref year
gen fibre_group_old=fibre_group
replace fibre_group=18 if fibre_group_old==.	// Generate fiber phase group for those that are never enabled (was previously missing)
bysort ruref: gen mover=1 if fibre_group!=l.fibre_group & l.fibre_group!=. & year>=2009 & year<=2014
bysort ruref:egen ever_move=max(mover)
drop mover fibre_group_old

noi di "Generate cloud classifications"
*** cloud hardware
gen cloud_hard= cloud_ownsoftware+cloud_storage+cloud_data
replace cloud_hard= 1 if cloud_hard>1 & cloud_hard!=.

*** cloud software
gen cloud_soft= cloud_crm+cloud_finance+cloud_software+cloud_email
replace cloud_soft= 1 if cloud_soft>1 & cloud_soft!=.

* combine cloud data and storgage
gen cloud_ds= cloud_data +cloud_storage
replace cloud_ds= 1 if cloud_ds==2

*** cloud low tech a la Eurostat (2018) conditional on not having advanced
gen cloud_low= cloud_email+cloud_software+cloud_storage
replace cloud_low= 1 if cloud_low>1 & cloud_low!=.
replace cloud_low=0 if cloud_data==1 | cloud_finance==1 | cloud_crm==1 | cloud_ownsoftware==1
 
*** cloud medium tech a la Eurostat (2018) 
gen cloud_medium= cloud_data
replace cloud_medium=1 if cloud_data==1 & cloud_email==1
replace cloud_medium=1 if cloud_data==1 & cloud_software==1
replace cloud_medium=1 if cloud_data==1 & cloud_storage==1

*** cloud high tech a la Eurostat (2018) 
gen cloud_high= cloud_finance+cloud_crm+cloud_ownsoftware
replace cloud_high= 1 if cloud_high>1 & cloud_high!=.



noi di "Create additional variables - separately for incumbent and young firms - Use young firm (<=5 in 2008) and incumbent (>5 in 2008)"
gen young_08 = 0  if !missing(birth_ent)
replace young_08 = 1  if !missing(birth_ent) & birth_ent >= 2003		// less than 5 in 2008
gen incumbent_08=0  if !missing(birth_ent)
replace incumbent_08=1 if young_08==0

gen fibre_L1_inc = fibre_L1 * incumbent_08
gen dist_fibre_L1_inc = dist_fibre_L1 * incumbent_08
gen cloud_inc = cloud * incumbent_08


noi di "Create additional variables - separately for incumbent and young firms - Use p25 (=10 years old) to define young and incumbent"
gen young25_08 = 0  if !missing(birth_ent)
replace young25_08 = 1  if !missing(birth_ent) & birth_ent >= 1998 // less than 10 in 2008
gen incumbent25_08=0  if !missing(birth_ent)
replace incumbent25_08=1 if young25_08==0
gen cloud_inc25 = cloud * incumbent25_08
gen cloud_yng25 = cloud * young25_08



noi di "Create additional variables - separately for firms of different (approximate) age quartiles"
gen ageq1_08 = 0  if !missing(birth_ent)
gen ageq2_08 = 0  if !missing(birth_ent)
gen ageq3_08 = 0  if !missing(birth_ent)
gen ageq4_08 = 0  if !missing(birth_ent)

replace ageq1_08 = 1  if !missing(birth_ent) & birth_ent >= 1998
replace ageq2_08 = 1  if !missing(birth_ent) & birth_ent >= 1988 & birth_ent < 1998
replace ageq3_08 = 1  if !missing(birth_ent) & birth_ent >= 1976 & birth_ent < 1988
replace ageq4_08 = 1  if !missing(birth_ent) & birth_ent < 1976 

gen fibre_L1_ageq1_08 = fibre_L1 * ageq1_08
gen fibre_L1_ageq2_08 = fibre_L1 * ageq2_08
gen fibre_L1_ageq3_08 = fibre_L1 * ageq3_08
gen fibre_L1_ageq4_08 = fibre_L1 * ageq4_08

gen dist_fibre_L1_ageq1_08 = dist_fibre_L1 * ageq1_08
gen dist_fibre_L1_ageq2_08 = dist_fibre_L1 * ageq2_08
gen dist_fibre_L1_ageq3_08 = dist_fibre_L1 * ageq3_08
gen dist_fibre_L1_ageq4_08 = dist_fibre_L1 * ageq4_08

gen cloud_ageq1_08 = cloud * ageq1_08
gen cloud_ageq2_08 = cloud * ageq2_08
gen cloud_ageq3_08 = cloud * ageq3_08
gen cloud_ageq4_08 = cloud * ageq4_08

noi di "Create additional variables - separately for firms of alternative age groups" 
gen ageq1A_08 = 0  if !missing(birth_ent)
gen ageq2A_08 = 0  if !missing(birth_ent)
gen ageq3A_08 = 0  if !missing(birth_ent)
gen ageq4A_08 = 0  if !missing(birth_ent)

replace ageq1A_08 = 1  if !missing(birth_ent) & birth_ent >= 2003  
replace ageq2A_08 = 1  if !missing(birth_ent) & birth_ent >= 1998 & birth_ent < 2003 
replace ageq3A_08 = 1  if !missing(birth_ent) & birth_ent >= 1989 & birth_ent < 1998  
replace ageq4A_08 = 1  if !missing(birth_ent) & birth_ent < 1989 

gen fibre_L1_ageq1A_08 = fibre_L1 * ageq1A_08
gen fibre_L1_ageq2A_08 = fibre_L1 * ageq2A_08
gen fibre_L1_ageq3A_08 = fibre_L1 * ageq3A_08
gen fibre_L1_ageq4A_08 = fibre_L1 * ageq4A_08

gen dist_fibre_L1_ageq1A_08 = dist_fibre_L1 * ageq1A_08
gen dist_fibre_L1_ageq2A_08 = dist_fibre_L1 * ageq2A_08
gen dist_fibre_L1_ageq3A_08 = dist_fibre_L1 * ageq3A_08
gen dist_fibre_L1_ageq4A_08 = dist_fibre_L1 * ageq4A_08

gen cloud_ageq1A_08 = cloud * ageq1A_08
gen cloud_ageq2A_08 = cloud * ageq2A_08
gen cloud_ageq3A_08 = cloud * ageq3A_08
gen cloud_ageq4A_08 = cloud * ageq4A_08


noi di "Generate mean centered continuous (initial) age interactions"
gen init_ageA=ln_age_ent if year==2008		// initial age
bysort entref: egen init_age08= max(init_ageA)
drop init_ageA

sum init_age08, detail
gen age_centred= init_age08 -  r(mean) // mean center
gen treat_age= cloud*age_centred
gen fibre_age =fibre_L1*age_centred
gen dist_fibre_age = dist_fibre_L1 * age_centred
drop init_age08

noi di "Generate mean centered continuous (initial) size interactions"
gen ln_size_ent_08A=ln_ees_ent if year==2008		// initial size
sort ruref
by ruref: egen init_size08_log= max(ln_size_ent_08A)
drop ln_size_ent_08A

sum init_size08_log, detail
gen ees_centred_log= init_size08_log -  r(mean)
gen treat_ees_log= cloud*ees_centred_log
gen fibre_ees_log =fibre_L1*ees_centred_log
gen dist_fibre_ees_log = dist_fibre_L1 * ees_centred_log
drop init_size08_log


noi di "Generate initial size and age interactions jointly"
gen small08=0 if !missing(ees_ent_08)
replace small08=1 if !missing(ees_ent_08) & ees_ent_08<50

gen small_youngA=0 if !missing(ees_ent_08) & !missing(age_ent)
gen small_oldA=0 if !missing(ees_ent_08) & !missing(age_ent)
gen large_youngA=0 if !missing(ees_ent_08) & !missing(age_ent)
gen large_oldA=0 if !missing(ees_ent_08) & !missing(age_ent)

replace small_youngA=1  if !missing(ees_ent_08) & !missing(age_ent)  & small08==1 & young25_08==1
replace  small_oldA=1  if !missing(ees_ent_08) & !missing(age_ent)  & small08==1 & young25_08==0
replace  large_youngA=1 if !missing(ees_ent_08) & !missing(age_ent) & small08==0 & young25_08==1
replace  large_oldA=1 if !missing(ees_ent_08) & !missing(age_ent) & small08==0 & young25_08==0

gen fibre_L1_SMLYNGA = fibre_L1 * small_youngA
gen fibre_L1_SMLOLDA = fibre_L1 * small_oldA
gen fibre_L1_LRGYNGA = fibre_L1 * large_youngA
gen fibre_L1_LRGOLDA = fibre_L1 * large_oldA

gen dist_fibre_L1_SMLYNGA = dist_fibre_L1 * small_youngA
gen dist_fibre_L1_SMLOLDA = dist_fibre_L1 * small_oldA
gen dist_fibre_L1_LRGYNGA = dist_fibre_L1 * large_youngA
gen dist_fibre_L1_LRGOLDA = dist_fibre_L1 * large_oldA

gen cloud_SMLYNGA = cloud * small_youngA
gen cloud_SMLOLDA = cloud * small_oldA
gen cloud_LRGYNGA = cloud * large_youngA
gen cloud_LRGOLDA = cloud * large_oldA




noi di "**********************************Label variables **************************************************************" 
*note in parentheses we note the data source of each raw variable 

label var ruref 				"Firm-level identifier (numeric)"
label var year 					"Year"

label var ln_sales_ent 			"Log real enterprise turnover (from BSD)"
label var d1_ln_sales_ent 		"1 year difference in log real enterprise turnover (ln_sales_ent) (from BSD)"
label var ln_emp_ent 			"Log enterprise employment (from BSD)"
label var d1_ln_emp_ent 		"1 year difference in log enterprise employment (ln_emp_ent) (from BSD)"
label var ees_ent_08			"Enterprise employees in 2008 (from BSD)"
label var ln_ees_ent 			"Log enterprise employees (from BSD)"
label var d1_ln_ees_ent 		"1 year difference in log enterprise employees (ln_ees_ent) (from BSD)"
label var ln_sales_worker 		"Log real enterprise turnover per unit employment = ln_sales_ent - ln_emp_ent (from BSD)"		
label var d1_ln_sales_worker 	"1 year difference in log real enterprise turnover per unit employment (ln_sales_worker) (from BSD)"

label var ever_move				"Dummy = 1 for firms that ever move to location with different expected fiber rollout enablement"
label var birth_ent 			"Year enterprise started operations (from BSD)"
label var age_ent				"Enterprise age = year - birth_ent (from BSD)"
label var ln_age_ent			"Log (1 + age_ent) (from BSD)"
label var multiplant			"Dummy = 1 if enterprise has multiple (live) local units at time t (from BSD)"
label var fgn_ent				"Dummy = 1 if ultimate foreign owner of the enterprise is outside the UK (from BSD)"
label var young_08				"Dummy = 1 if young firm in 2008 (<=5 years old) (from BSD)"
label var incumbent_08			"Dummy = 1 if incumbent firm in 2008 (>5 years old) (from BSD)"
label var young25_08			"Dummy = 1 if young firm in 2008 using p25 of age distribution (<=10 years old) (from BSD)"
label var incumbent25_08		"Dummy = 1 if incumbent firm in 2008 above p25 of age distribution (>10 years old) (from BSD)"
label var ageq1_08				"Dummy = 1 if firm is in 1st quartile for age in 2008 (from BSD)"
label var ageq2_08				"Dummy = 1 if firm is in 2nd quartile for age in 2008 (from BSD)"
label var ageq3_08				"Dummy = 1 if firm is in 3rd quartile for age in 2008 (from BSD)"
label var ageq4_08				"Dummy = 1 if firm is in 4th quartile for age in 2008 (from BSD)"
label var ageq1A_08				"Dummy = 1 if firm is 5 years or younger in 2008 (from BSD)"
label var ageq2A_08				"Dummy = 1 if firm is between 6 and 10 years old in 2008 (from BSD)"
label var ageq3A_08				"Dummy = 1 if firm is between 11 and 20 years old in 2008 (from BSD)"
label var ageq4A_08				"Dummy = 1 if firm is more than 20 years old in 2008 (from BSD)"

label var live_lu_ent			"# live local units of the enterprise (from BSD)"
label var numla_lu_ent 			"# different local authorities the enterprise has (live) local units (from BSD)"
label var ln_numla_lu_ent 		"Log # different local authorities the enterprise has (live) local units = ln(numla_lu_ent) (from BSD)"
label var live_lu_dead2			"# dead local units of the enterprise between t and t-2 (from BSD)"
label var dead_share			"# dead local units between t and t-2 as a share of the total number of local units of the enterprise = live_lu_dead2 / live_lu_ent (from BSD)"			
label var live_lu_born2			"# born local units of the enterprise between t and t-2 (from BSD)"
label var born_share			"# born local units between t and t-2 as a share of the total number of local units of the enterprise = live_lu_born2 / live_lu_ent (from BSD)"			
label var dist_wav_ent			"Local unit employment weighted average distance between local units and enterprise (km) (from BSD)"
label var dist_av_ent			"Unweighted average distance between local units and enterprise (km) (from BSD)"
label var dist_cov_lu_ent		"Covariance between distance between local unit employment and distance from enterprise = dist_wav_ent - dist_av_ent (from BSD)"

label var urban_ent				"Enterprise is located in urban area (from BSD)"			
label var serv					"Enterprise is services sector - UK SIC 2003 2-digit codes 50-74 (from BSD)"			
label var KIA					"Enterprise is in knowledge intensive sector - UK SIC 2003 2-digit codes 23-24, 30-35, 72-73 (from BSD)"			

label var ln_IT_inv				"Log (1 + real investment in IT) (from ARD)"
label var ln_IT_inv_emp			"Log IT investment per employee = ln_IT_inv - ln_ees_ent "			
     
label var pcs_ees				"% of employees with PCs (from Ecommerce)"
label var internetmob_ees		"% of employees with mobile internet access (from Ecommerce)"
label var online_sales			"Dummy = 1 for positive e-commerce sales (from Ecommerce)"
label var online_sales_pc		"% e-commerce sales as a share of all sales (from Ecommerce)"
label var rfid_ident			"Dummy = 1 for use of RFID for product identification (from Ecommerce)"
label var rfid_prod				"Dummy = 1 for use of RFID for monitoring and control of industrial production (from Ecommerce)"
label var erp					"Dummy = 1 for use of Enterprise Resource Planning software (from Ecommerce)"
label var crm_mkt				"Dummy = 1 for use of Customer Relationship Management Software for marketing purposes (from Ecommerce)"
label var crm_cust				"Dummy = 1 for use of Customer Relationship Management Software for sharing information with customers (from Ecommerce)"
label var ict_specialists		"Dummy = 1 for use of ICT specialists (from Ecommerce)"
label var ict_training			"Dummy = 1 for provision of IT training (from Ecommerce)"
label var scm_supp				"Dummy = 1 for use of Supply Chain Management Software for sharing information with suppliers (from Ecommerce)"
label var scm_cust				"Dummy = 1 for use of Supply Chain Management Software for sharing information with customers (from Ecommerce)"
        
label var exchange_code_ent 		"Code of enterprise's nearest telephone exchange (string) (from OFCOM)"
label var fibre_group				"Telephone exchange's fiber rollout phase enablement group (from OFCOM)"
label var fibre_enable_year_ent		"Fiber enablement year (from OFCOM)"
label var fibre_enable_year_ent_lag	"Fiber enablement year lagged one year = 1+fibre_enable_year_ent (missing/never-enabled coded = 0) (from OFCOM)"
label var fibre_L1					"Dummy = 1 if exchange is fiber enabled at time t-1 (from OFCOM)"
label var exchange_distance_ent		"Distance of enterprise from nearest exchange (metres) (from OFCOM)"
label var dist_fibre_L1				"Fiber speed proxy - Interaction of fibre_L1 * exchange_distance_ent (from OFCOM)"

label var cloud					"Dummy = 1 if firm uses any type of cloud (assume = 0 in 2008) (from Ecommerce)"
label var cloud_hard			"Dummy = 1 if firm uses cloud for hardware services (databases, storage, processing software) (from Ecommerce)"
label var cloud_soft			"Dummy = 1 if firm uses cloud for software services (email, software, crm or finance) (from Ecommerce)"
label var cloud_low				"Dummy = 1 if firm uses cloud for low-tech services (Eurostat, 2018) (from Ecommerce)"
label var cloud_medium			"Dummy = 1 if firm uses cloud for medium-tech services (Eurostat, 2018) (from Ecommerce)"
label var cloud_high			"Dummy = 1 if firm uses cloud for high-tech services (Eurostat, 2018) (from Ecommerce)"

label var cloud_ownsoftware		"Dummy = 1 if firm uses cloud computing capacity to process own software (assume = 0 in 2008) (from Ecommerce)"
label var cloud_storage			"Dummy = 1 if firm uses cloud services for storage of files (assume = 0 in 2008) (from Ecommerce)"
label var cloud_data			"Dummy = 1 if firm uses cloud services for hosting of databases (assume = 0 in 2008) (from Ecommerce)"
label var cloud_crm				"Dummy = 1 if firm uses cloud services to access CRM software (assume = 0 in 2008) (from Ecommerce)"
label var cloud_software		"Dummy = 1 if firm uses cloud services to access Office software (assume = 0 in 2008) (from Ecommerce)"
label var cloud_finance			"Dummy = 1 if firm uses cloud services to access finance or accounting software (assume = 0 in 2008) (from Ecommerce)"
label var cloud_email			"Dummy = 1 if firm uses cloud services to access email (assume = 0 in 2008) (from Ecommerce)"

label var cloud_inc					"Cloud use for incumbent firms (>5 years old) - interaction of cloud * incumbent_08"
label var fibre_L1_inc				"Fiber availability for incumbent firms (>5 years old) - Interaction of fibre_L1 * incumbent_08"
label var dist_fibre_L1_inc			"Fiber speed proxy for incumbent firms (>5 years old) - Interaction of dist_fibre_L1 * incumbent_08"
label var cloud_inc25				"Cloud use for incumbent firms (defined as > p25 age) - Interaction of cloud * incumbent25_08"
label var cloud_yng25				"Cloud use for young firms (defined as <= p25 age) - Interaction of cloud * young25_08"

label var cloud_ageq1_08			"Cloud use for 1st age quartile firms - Interaction of cloud * ageq1_08"
label var cloud_ageq2_08			"Cloud use for 2nd age quartile firms - Interaction of cloud * ageq2_08"
label var cloud_ageq3_08			"Cloud use for 3rd age quartile firms -Interaction of cloud * ageq3_08"
label var cloud_ageq4_08			"Cloud use for 4th age quartile firms - Interaction of cloud * ageq4_08"
label var fibre_L1_ageq1_08			"Fiber availability for 1st age quartile firms - Interaction of fibre_L1 * ageq1_08"
label var fibre_L1_ageq2_08			"Fiber availability for 2nd age quartile firms - Interaction of fibre_L1 * ageq2_08"
label var fibre_L1_ageq3_08			"Fiber availability for 3rd age quartile firms - Interaction of fibre_L1 * ageq3_08"
label var fibre_L1_ageq4_08			"Fiber availability for 4th age quartile firms - Interaction of fibre_L1 * ageq4_08"
label var dist_fibre_L1_ageq1_08	"Fiber speed proxy for 1st age quartile firms - Interaction of dist_fibre_L1 * ageq1_08"
label var dist_fibre_L1_ageq2_08	"Fiber speed proxy for 2nd age quartile firms - Interaction of dist_fibre_L1 * ageq2_08"
label var dist_fibre_L1_ageq3_08	"Fiber speed proxy for 3rd age quartile firms - Interaction of dist_fibre_L1 * ageq3_08"
label var dist_fibre_L1_ageq4_08	"Fiber speed proxy for 4th age quartile firms - Interaction of dist_fibre_L1 * ageq4_08"

label var cloud_ageq1A_08			"Interaction of cloud * ageq1A_08"
label var cloud_ageq2A_08			"Interaction of cloud * ageq2A_08"
label var cloud_ageq3A_08			"Interaction of cloud * ageq3A_08"
label var cloud_ageq4A_08			"Interaction of cloud * ageq4A_08"
label var fibre_L1_ageq1A_08		"Interaction of fibre_L1 * ageq1A_08"
label var fibre_L1_ageq2A_08		"Interaction of fibre_L1 * ageq2A_08"
label var fibre_L1_ageq3A_08		"Interaction of fibre_L1 * ageq3A_08"
label var fibre_L1_ageq4A_08		"Interaction of fibre_L1 * ageq4A_08"
label var dist_fibre_L1_ageq1A_08	"Interaction of dist_fibre_L1 * ageq1A_08"
label var dist_fibre_L1_ageq2_08	"Interaction of dist_fibre_L1 * ageq2A_08"
label var dist_fibre_L1_ageq3_08	"Interaction of dist_fibre_L1 * ageq3A_08"
label var dist_fibre_L1_ageq4_08	"Interaction of dist_fibre_L1 * ageq4A_08"

label var age_centred				"Mean centered continuous firm age in 2008"
label var treat_age					"Interaction of cloud * age_centred"
label var fibre_age					"Interaction of fibre_L1 * age_centred"
label var dist_fibre_age			"Interaction of dist_fibre_L1 * age_centred"

label var ees_centred_log			"Mean centered continuous ln_ees_ent in 2008"
label var treat_ees_log				"Interaction of cloud * ees_centred_log"
label var fibre_ees_log				"Interaction of fibre_L1 * ees_centred_log"
label var dist_fibre_ees_log		"Interaction of dist_fibre_L1 * ees_centred_log"

label var small08					"Small firm - initial employees (ees_ent_08) of less than 50"
label var small_youngA				"Small and young firm - interaction of small08 * young25_08"
label var small_oldA				"Small and incumbent firm - interaction of small08 * (1- young25_08)"
label var large_youngA				"Large and young firm - interaction of (1 - small08) * young25_08"
label var large_oldA				"Large and old firm - interaction of (1 - small08) * (1 - young25_08)"

label var cloud_SMLYNGA				"Cloud use for small and young firm - Interaction of cloud * small_youngA"
label var cloud_SMLOLDA				"Cloud use for small and old firm - Interaction of cloud * small_oldA"
label var cloud_LRGYNGA				"Cloud use for large and young firm - Interaction of cloud * large_youngA"
label var cloud_LRGOLDA				"Cloud use for large and old firm - Interaction of cloud * large_oldA"

label var fibre_L1_SMLYNGA			"Fiber availability for small and young firm - Interaction of fibre_L1 * small_youngA"
label var fibre_L1_SMLOLDA			"Fiber availability for small and old firm - Interaction of fibre_L1 * small_oldA"
label var fibre_L1_LRGYNGA			"Fiber availability for large and young firm - Interaction of fibre_L1 * large_youngA"
label var fibre_L1_LRGOLDA			"Fiber availability for large and old firm - Interaction of fibre_L1 * large_oldA"

label var dist_fibre_L1_SMLYNGA		"Fiber speed proxy for small and young firm - Interaction of dist_fiber_L1 * small_youngA"
label var dist_fibre_L1_SMLOLDA		"Fiber speed proxy for small and old firm - Interaction of dist_fiber_L1 * small_oldA"
label var dist_fibre_L1_LRGYNGA		"Fiber speed proxy for large and young firm - Interaction of dist_fiber_L1 * large_youngA"
label var dist_fibre_L1_LRGOLDA		"Fiber speed proxy for large and old firm - Interaction of dist_fiber_L1 * large_oldA"

	





noi di "**********************************Save data **************************************************************" 

compress
save  "$saveddata/RESTAT_replication.dta", replace		



